rsubmit;
data delist; set crsp.mse
(keep = cusip date event dlret dlstcd);
where event="DELIST" and dlret<10 and 1962<year(date)<2021;
year=year(date); month=month(date); drop date;
data crsp; set crsp.msf
(keep = cusip date ret prc shrout);
where 1962<year(date)<2021;
year=year(date); month=month(date);
me=abs(prc)*shrout;
drop date prc shrout;
proc sort data=delist; by cusip year month;
proc sort data=crsp; by cusip year month;
data crsp; merge crsp (in=in1) delist (in=in2);
by cusip year month; if in1;
proc means; run;

data exch; set crsp.mse (keep = date cusip exchcd shrcd);
year=year(date); month=month(date); drop date;
proc sort data=exch; by cusip year month;
data crsp; merge crsp exch; by cusip year month;
data temp.crsp; set crsp; by cusip year month;
i=0; do while(i<2); lexchcd = lag(exchcd); lshrcd=lag(shrcd);
if cusip = lag(cusip) and exchcd = . then exchcd = lexchcd;
if cusip = lag(cusip) and shrcd = . then shrcd = lshrcd;
i+1; end; if last.month then output;
drop i lexchcd lshrcd;
proc sort data=temp.crsp nodupkey;
by cusip year month;
data temp.crsp; set temp.crsp;
if (nmiss(dlret)=1 or dlret=0) and 500<=dlstcd<600 and (exchcd ne 0 
and exchcd ne 3 and exchcd ne 33) then dlret=-0.3;
if (nmiss(dlret)=1 or dlret=0) and 500<=dlstcd<600 and
(exchcd=0 or exchcd=3 or exchcd=33) then dlret=-0.55;
if nmiss(ret)=1 and nmiss(dlret)=0 then return=dlret*100;
else return=ret*100; if shrcd ne 10 and shrcd ne 11 then delete;
drop event ret dlret dlstcd shrcd;
proc means; run;

rsubmit;
options ls=80;
data temp.daily; set crsp.dsf (keep = cusip date ret);
where 1962<year(date)<2021; proc sort; by date; run;
data index; set ff.factors_daily
(keep = date mktrf rf smb hml); proc sort; by date;
data temp.daily; merge temp.daily (in=in1) index;
by date; if in1; eret=(1+ret)/(1+rf)-1;
yearm=(year(date)-1963)*12+month(date);
proc sort; by cusip yearm; run;
proc means data=temp.daily; run;

rsubmit;
proc reg data=temp.daily noprint;
model eret = mktrf smb hml / edf;
by cusip yearm;	output out=temp.best
r=residual; proc means; run;

rsubmit;
data temp.best; set temp.best
(keep = cusip yearm residual);
proc sort; by cusip yearm;
proc means data=temp.best noprint;
var residual; by cusip yearm; 
output out=temp.ivol std(residual)=ivol;
data temp.ivol; set temp.ivol;
year=floor((yearm-0.01)/12)+1963;
month=yearm-(year-1963)*12;
if _freq_<15 then delete;
drop _type_ yearm; proc means; run;

rsubmit;
data cap; set temp.crsp (keep = cusip
year month me exchcd); where year>1962;
proc sort data=cap; by year month;
proc univariate data=cap noprint;
var me; by year month; where exchcd=1;
output out=decile pctlpts = 10 to 90 by 10 pctlpre=dec;
data cap; merge cap decile; by year month;
if me<dec10 then decile=1; else decile=0;
yearm=(year-1963)*12+month;
drop dec10 dec20 dec30 dec40 dec50 dec60 dec70 dec80 dec90;
proc sort; by cusip yearm;
data sdecile; set cap (keep = cusip yearm decile);
yearm=yearm+1; sdecile=decile; drop decile;
proc sort; by cusip yearm;
data cap; merge cap sdecile; by cusip yearm;
proc sort; by cusip year month;
data ivol; merge cap temp.ivol; by cusip year month;
if sdecile=1 then delete; drop decile sdecile yearm;
proc sort data=ivol; by year month;
proc means data=ivol noprint; var ivol;
by year month; output out=mivol mean(ivol)=mivoltr;
data mivol; set mivol; drop _freq_ _type_;
if nmiss(mivoltr) then delete;
data ivol; set temp.ivol; proc sort; by year month;
proc means data=ivol noprint; var ivol;
by year month; output out=mivol0 mean(ivol)=mivol;
proc means data=mivol0; run;
data mivol0; set mivol0; drop _freq_ _type_;
if nmiss(mivol) then delete;
data mivol; merge mivol mivol0; by year month;
proc means data=mivol; run;
proc download data=mivol
out=sasuser.mivol (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.MIVOL 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2021.xls"
DBMS=EXCEL REPLACE; SHEET="mivol21"; RUN;

rsubmit;
options nosource nonotes errors=0;
%let begyear=13; %let endyear=696;
%macro makebeta(st);
%do myear=%eval(&begyear) %to %eval(&endyear);;
data perms; set temp.daily (keep=cusip yearm);
if yearm eq %eval(&myear); proc sort nodupkey; by cusip;
data sub; set temp.daily; if yearm ge %eval(&myear-11)
and yearm le %eval(&myear); proc sort; by cusip;
proc sql; create table regdata as select
sub.cusip, sub.eret, sub.mktrf, sub.smb, sub.hml
from perms, work.sub where perms.cusip = sub.cusip; quit;
proc sort data=regdata; by cusip; 
proc reg data=regdata noprint;
model eret = mktrf smb hml / edf;
by cusip; output out=temp.best&myear r=residual;
data temp.best&myear; set temp.best&myear;
yearm = %eval(&myear); keep cusip yearm residual;
proc means data=temp.best&myear noprint;
var residual; by cusip; 
output out=temp.ivol&myear std(residual)=ivol;
proc delete data=temp.best&myear;
data temp.ivol&myear; set temp.ivol&myear;
if _freq_<120 then delete; drop _freq_ _type_;
run; %end; %mend; %makebeta(1);

rsubmit;
options nosource nonotes errors=0;
%let begyear=13; %let endyear=696;
%macro makebeta(st);
%do myear=%eval(&begyear) %to %eval(&endyear);;
data ivol&myear; set temp.ivol&myear; yearm=%eval(&myear);
run; %end; %mend; %makebeta(1);
%macro makebeta(st); data temp.ivolann; set 
%do myear=%eval(&begyear) %to %eval(&endyear);
ivol&myear %end;; %mend; %makebeta(1);
data temp.ivolann; set temp.ivolann;
year=floor((yearm-0.01)/12)+1963;
month=yearm-(year-1963)*12; drop yearm;
proc sort data=temp.ivolann;
by cusip year month; proc means; run;

rsubmit;
data cap; set temp.crsp (keep = cusip
year month me exchcd); where year>1962;
proc sort data=cap; by year month;
proc univariate data=cap noprint;
var me; by year month; where exchcd=1;
output out=decile pctlpts = 10 to 90 by 10 pctlpre=dec;
data cap; merge cap decile; by year month;
if me<dec10 then decile=1; else decile=0;
yearm=(year-1963)*12+month;
drop dec10 dec20 dec30 dec40 dec50 dec60 dec70 dec80 dec90;
proc sort; by cusip yearm;
data sdecile; set cap (keep = cusip yearm decile);
yearm=yearm+1; sdecile=decile; drop decile;
proc sort; by cusip yearm;
data cap; merge cap sdecile; by cusip yearm;
proc sort; by cusip year month;
data ivol; merge cap temp.ivolann; by cusip year month;
if sdecile=1 then delete; drop decile sdecile yearm;
proc sort data=ivol; by year month;
proc means data=ivol noprint; var ivol;
by year month; output out=mivol mean(ivol)=mivoltr;
data mivol; set mivol; drop _freq_ _type_;
if nmiss(mivoltr) then delete;
data ivol; set temp.ivolann; proc sort; by year month;
proc means data=ivol noprint; var ivol;
by year month; output out=mivol0 mean(ivol)=mivol;
proc means data=mivol0; run;
data mivol0; set mivol0; drop _freq_ _type_;
if nmiss(mivol) then delete;
data mivol; merge mivol mivol0; by year month;
proc means data=mivol; run;
proc download data=mivol
out=sasuser.mivol (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.MIVOL 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2021.xls"
DBMS=EXCEL REPLACE; SHEET="mivolann"; RUN;

rsubmit;
data cap; set temp.crsp (keep = cusip year month me);
proc sort data=cap; by cusip year month;
data cap; set cap; lcusip=lag(cusip); lme=lag(me);
if lcusip ne cusip then lme=.; drop me;
data ivol; merge cap temp.ivol; by cusip year month;
if year<1963 then delete; proc sort; by year month;
proc means data=ivol noprint; var ivol; by year month;
weight lme; output out=mivol mean(ivol)=mivol; proc means; run;
data mivol; set mivol; drop _freq_ _type_; if nmiss(mivol) then delete;
proc download data=mivol out=sasuser.mivol (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.MIVOL 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2018.xls"
DBMS=EXCEL REPLACE; SHEET="mivol18vw"; RUN;

rsubmit;
data temp.daily; set crsp.dsf (keep = cusip date ret);
where 1962<year(date)<2018; proc sort; by cusip date;
data temp.daily; set temp.daily; lcusip=lag(cusip);
lret=lag(ret); if cusip ne lcusip then lret=.;
year=year(date); month=month(date); proc means; run;

rsubmit;
data temp.daily; set temp.daily;
varret=ret*ret+2*ret*lret; run;
proc sort; by cusip year month;
proc means data=temp.daily noprint;
var varret; by cusip year month; output out=temp.avvar
mean(varret) = avvarmean sum(varret) = avvarsum;
data temp.avvar; set temp.avvar; where _freq_>14;
drop _freq_ _type_; proc means; run;

rsubmit;
data cap; set temp.crsp (keep = cusip
year month me exchcd); where year>1962;
proc sort data=cap; by year month;
proc univariate data=cap noprint;
var me; by year month; where exchcd=1;
output out=decile pctlpts = 10 to 90 by 10 pctlpre=dec;
data cap; merge cap decile; by year month;
if me<dec10 then decile=1; else decile=0;
yearm=(year-1963)*12+month;
drop dec10 dec20 dec30 dec40 dec50 dec60 dec70 dec80 dec90;
proc sort; by cusip yearm;
data sdecile; set cap (keep = cusip yearm decile);
yearm=yearm+1; sdecile=decile; drop decile;
proc sort; by cusip yearm;
data cap; merge cap sdecile; by cusip yearm;
proc sort; by cusip year month;
data ivol; merge cap temp.avvar; by cusip year month;
if sdecile=1 or avvarsum<0 then delete;
drop decile sdecile yearm; proc sort; by year month;
proc means data=ivol noprint; var avvarsum;
by year month; output out=mivol mean(avvarsum)=mavvar;
proc means data=mivol; run;
data mivol; set mivol; drop _freq_ _type_;
if nmiss(mavvar) then delete;
proc download data=mivol
out=sasuser.mavvar (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.MAVVAR 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2018.xls"
DBMS=EXCEL REPLACE; SHEET="mavvar18"; RUN;

rsubmit;
data cap; set temp.crsp (keep = cusip year month me);
proc sort data=cap; by cusip year month;
data cap; set cap; lcusip=lag(cusip); lme=lag(me);
if lcusip ne cusip then lme=.; drop me;
data avvar; merge cap temp.avvar; by cusip year month;
if year<1963 or avvarsum<0 then delete; proc sort; by year month;
proc means data=avvar noprint; var avvarsum; by year month;
weight lme; output out=mavvar mean(avvarsum)=mavvar; proc means; run;
data mavvar; set mavvar; drop _freq_ _type_; if nmiss(mavvar) then delete;
proc download data=mavvar out=sasuser.mavvar (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.Mavvar 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2018.xls"
DBMS=EXCEL REPLACE; SHEET="mavvar18vw"; RUN;

rsubmit;
data temp.daily; set crsp.dsf (keep = cusip date ret);
where 1962<year(date)<2018; year=year(date);
month=month(date); drop date;
proc sort; by cusip year month; proc means; run;
proc means data=temp.daily noprint;
var ret; by cusip year month;
output out=temp.tvol var(ret)=tvol;
proc sort; by cusip year month;
data temp.tvol; set temp.tvol;
where _freq_ ge 15; drop _type_ _freq_;
proc means; run;
data cap; set temp.crsp (keep = cusip
year month me exchcd); where year>1962;
proc sort data=cap; by year month;
proc univariate data=cap noprint;
var me; by year month; where exchcd=1;
output out=decile pctlpts = 10 to 90 by 10 pctlpre=dec;
data cap; merge cap decile; by year month;
if me<dec10 then decile=1; else decile=0;
yearm=(year-1963)*12+month;
drop dec10 dec20 dec30 dec40 dec50 dec60 dec70 dec80 dec90;
proc sort; by cusip yearm;
data sdecile; set cap (keep = cusip yearm decile);
yearm=yearm+1; sdecile=decile; drop decile;
proc sort; by cusip yearm;
data cap; merge cap sdecile; by cusip yearm;
proc sort; by cusip year month;
data tvol; merge cap temp.tvol; by cusip year month;
if sdecile=1 then delete; drop decile sdecile yearm;
proc sort data=tvol; by year month;
proc means data=tvol noprint; var tvol;
by year month; output out=temp.mtvol
mean(tvol)=mtvol; proc means; run;
data temp.mtvol; set temp.mtvol;
if nmiss(mtvol) then delete;
drop _freq_ _type_; proc means; run;
proc download data=temp.mtvol
out=sasuser.mtvol (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.MTVOL 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2018.xls"
DBMS=EXCEL REPLACE; SHEET="mtvol18"; RUN;

rsubmit;
data cap; set temp.crsp (keep = cusip year month me);
proc sort data=cap; by cusip year month;
data cap; set cap; lcusip=lag(cusip); lme=lag(me);
if lcusip ne cusip then lme=.; drop me;
data tvol; merge cap temp.tvol; by cusip year month;
if year<1963 then delete; proc sort; by year month;
proc means data=tvol noprint; var tvol; by year month;
weight lme; output out=mtvol mean(tvol)=mtvol; proc means; run;
data mtvol; set mtvol; drop _freq_ _type_; if nmiss(mtvol) then delete;
proc download data=mtvol out=sasuser.mtvol (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.Mtvol 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2018.xls"
DBMS=EXCEL REPLACE; SHEET="mtvol18vw"; RUN;

rsubmit;
options ls=80;
data temp.daily; set crsp.dsf (keep = cusip date ret);
where 1962<year(date)<2018; proc sort; by date; run;
data index; set ff.factors_daily
(keep = date mktrf rf); proc sort; by date;
data temp.daily; merge temp.daily (in=in1) index;
by date; if in1; eret=(1+ret)/(1+rf)-1;
yearm=(year(date)-1963)*12+month(date);
proc sort; by cusip yearm; run;
proc means data=temp.daily; run;

rsubmit;
proc reg data=temp.daily noprint;
model eret = mktrf / edf;
by cusip yearm;	output out=temp.best
r=residual; proc means; run;

rsubmit;
data temp.best; set temp.best
(keep = cusip yearm residual);
proc sort; by cusip yearm;
proc means data=temp.best noprint;
var residual; by cusip yearm; 
output out=temp.ivolcapm std(residual)=ivol;
data temp.ivolcapm; set temp.ivolcapm;
year=floor((yearm-0.01)/12)+1963;
month=yearm-(year-1963)*12;
if _freq_<15 then delete;
drop _type_ _freq_ yearm; proc means; run;

rsubmit;
data cap; set temp.crsp (keep = cusip year month me);
proc sort data=cap; by cusip year month;
data cap; set cap; lcusip=lag(cusip); lme=lag(me);
if lcusip ne cusip then lme=.; drop me;
data ivol; merge temp.ivolm cap; by cusip year month;
proc sort; by year month; proc means; run;
proc means data=ivol noprint; var ivol; by year month;
weight lme; output out=temp.mivol mean(ivol) = mivol;
data temp.mivol; set temp.mivol; drop _freq_ _type_;
if nmiss(mivol) then delete; proc means; run;
proc download data=temp.mivol
out=sasuser.factor25 (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.FACTOR25 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\IVol Factor\ivol data.xls"
DBMS=EXCEL REPLACE; SHEET="ivoltscapm"; RUN;
